********************************************************************************
***************** Extracting ICD-10 code lists from HES APC ********************
****************************** Chapter 4.1  ************************************


*     Note: The documentation file in the hesapc folder includes a data dictionary, 
*           which has information on the different files and file structures within hesapc.
*           This is important to look at and discover which files you need to use (eRAP)

*     We will be going through how to extract the icd-10 fracture codelist from hesapc Diagnoses.



*     icd10 is the command we will be using, running the below code will return a help file
help icd10





*     Setting globals

global codes "My file path:\...\codes\Codelists\data\"
global hesapc "My file path:...\CPRD data _risks\HES APC\Type2_delivery\Results\Aurum_linked\Final\"
global working "My file path:\...\Stata files\Working\"




*     Let's format the patient file first so we have a list of acceptable patients.


**///////////////////////////// HES Patient //////////////////////////////////**
{
***************** Step 1: Import HES Patient file ******************************
*     We want to create a patient file that includes acceptable patients.

import delimited "$hesapc\hes_patient.txt", bindquote(nobind) stripquote(no) stringcols(_all)




***************** Step 2: Drop patients that have too many HES ID's (>20) ******

codebook n_patid_hes

*     Generate variable that can be dropped when specifying a number
gen hesid = real( n_patid_hes)
drop n_patid_hes
rename hesid n_patid_hes

*     Drop 'unacceptable' records
drop if n_patid_hes >20




***************** Step 3: Look at quality of matching **************************
*     Look at quality of matching between AURUM and HES (perhaps drop rank>1)
codebook match_rank

keep if match_rank=="1"

*     Keep list of acceptable patid's
keep patid




***************** Step 4: Save acceptable patients list ************************
*     Save list
save "$working\HESlinked_acceptable_patients.dta"

}













*     Next we cover extracting the codelist from the different hesapc files.



**/////////////////////// Primary HES diagnoses //////////////////////////////**
{
***************** Step 1: Import hes_primary_diag_hosp.txt *********************
*     This is the Primary diagnoses across a hospitalisation .txt file.

import delimited "$hesapc\hes_primary_diag_hosp.txt", bindquote(nobind) stripquote(no) stringcols(1 2) 




***************** Step 2: Using icd-10 command *********************************

*     Let's check if the codes in this file are all defined icd10 codes
icd10 check icd_primary, fmtonly summary version(2019)




*     Changing the icd10 codes so that there are no dots. E.g. XXXX format instead of XXX.X
icd10 clean icd_primary, gen(icd10) nodots




*     Renaming so it is the same name as the codelist
rename icd10 alt_code




***************** Step 3: Merge with fracture icd10 reviewed codelist **********

merge m:1 alt_code using "$codes\fracture_icd10.dta"




***************** Step 4: Check merge ******************************************

browse if _merge==2
browse if _merge==1

*     We want to keep those that are matched in both codelist and primary diagnoses.
keep if _merge==3




***************** Step 5: Tidy the data ****************************************
*     Dropping empty variables and merge variable

drop _merge
codebook modifier_4
codebook modifier_5
drop modifier_4 modifier_5



*     Use loop to format dates so they are stata readable
foreach date in admidate discharged {
gen `date'1 = date(`date', "DMY")
format `date'1 %td
drop `date'
rename `date'1 `date'
}


*     Browse missing dates
browse if admidate==.
browse if discharged ==.




***************** Step 6: Merge with acceptable patients ***********************
*     We only want those that have acceptable HESAPC records.

merge m:1 patid using "$working\HESlinked_acceptable_patients.dta"

*     Keep those that are acceptable
keep if _merge==3

drop _merge


*     Save to working folder, you can change the name of this.
save "$working\fractures_icd10_primarydiaghosp.dta"

}









clear








**/////////////////////// HES diagnoses hospitalisation //////////////////////**
{
***************** Step 1: Import hes_diagnosis_hosp.txt ************************
*     This is the By hospitalisation .txt file.

import delimited "$hesapc\hes_diagnosis_hosp.txt", bindquote(nobind) stripquote(no) stringcols(_all)




***************** Step 2: Using icd-10 command *********************************

*     Let's check if the codes in this file are all defined icd10 codes
icd10 check icd, fmtonly summary version(2019)




*     Changing the icd10 codes so that there are no dots. E.g. XXXX format instead of XXX.X
icd10 clean icd, gen(alt_code) nodots




***************** Step 3: Merge with fracture icd10 reviewed codelist **********

merge m:1 alt_code using "$codes\fracture_icd10.dta"




***************** Step 4: Check merge ******************************************

browse if _merge==2
browse if _merge==1

*     We want to keep those that are matched in both codelist and diagnoses hospitalisation.
keep if _merge==3




***************** Step 5: Tidy the data ****************************************
*     Dropping empty variables and merge variable

drop _merge
codebook modifier_4
codebook modifier_5
drop modifier_4 modifier_5


*     Use loop to format dates
foreach date in admidate discharged {
gen `date'1 = date(`date', "DMY")
format `date'1 %td
drop `date'
rename `date'1 `date'
}


*     Browse if missing dates
browse if admidate==.
browse if discharged ==.




***************** Step 6: Merge with acceptable patients ***********************
*     We only want those that have acceptable HESAPC records.

merge m:1 patid using "$working\HESlinked_acceptable_patients.dta"

*     Keep those that are acceptable
keep if _merge==3

drop _merge


*     Save to working folder, you can change the name of this.
save "$working\fractures_icd10_diaghosp.dta"

}








clear








**/////////////////////// HES diagnoses episode //////////////////////////////**
{
***************** Step 1: Import hes_diagnosis_hosp.txt ************************
*     This is the By hospitalisation .txt file.

import delimited "$hesapc\hes_diagnosis_epi.txt", bindquote(nobind) stripquote(no) stringcols(_all)




***************** Step 2: Using icd-10 command *********************************

*     Let's check if the codes in this file are all defined icd10 codes
icd10 check icd, fmtonly summary version(2019)




*     Changing the icd10 codes so that there are no dots. E.g. XXXX format instead of XXX.X
icd10 clean icd, gen(alt_code) nodots




***************** Step 3: Merge with fracture icd10 reviewed codelist **********

merge m:1 alt_code using "$codes\fracture_icd10.dta"




***************** Step 4: Check merge ******************************************

browse if _merge==2
browse if _merge==1

*     We want to keep those that are matched in both codelist and diagnoses episode.
keep if _merge==3




***************** Step 5: Tidy the data ****************************************
*     Dropping empty variables and merge variable

drop _merge
codebook modifier_4
codebook modifier_5
drop modifier_4 modifier_5


*     Format dates
foreach date in epistart epiend {
gen `date'1 = date(`date', "DMY")
format `date'1 %td
drop `date'
rename `date'1 `date'
}


*     Check if missing dates
browse if epistart==. | epiend==.




***************** Step 6: Merge with acceptable patients ***********************
*     We only want those that have acceptable HESAPC records.

merge m:1 patid using "$working\HESlinked_acceptable_patients.dta"

*     Keep those that are acceptable
keep if _merge==3

drop _merge


*     Save to working folder, you can change the name of this.
save "$working\fractures_icd10_diagepi.dta"
}

